In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn import metrics

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

%matplotlib inline

import warnings
warnings.filterwarnings("ignore")
In [2]:
df= pd.read_csv('vehicle.csv')
In [3]:
df.head(10)
Out[3]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
5 107 NaN 106.0 172.0 50.0 6 255.0 26.0 28.0 169 280.0 957.0 264.0 85.0 5.0 9.0 181.0 183 bus
6 97 43.0 73.0 173.0 65.0 6 153.0 42.0 19.0 143 176.0 361.0 172.0 66.0 13.0 1.0 200.0 204 bus
7 90 43.0 66.0 157.0 65.0 9 137.0 48.0 18.0 146 162.0 281.0 164.0 67.0 3.0 3.0 193.0 202 van
8 86 34.0 62.0 140.0 61.0 7 122.0 54.0 17.0 127 141.0 223.0 112.0 64.0 2.0 14.0 200.0 208 van
9 93 44.0 98.0 NaN 62.0 11 183.0 36.0 22.0 146 202.0 505.0 152.0 64.0 4.0 14.0 195.0 204 car
In [4]:
df.shape
Out[4]:
(846, 19)
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   compactness                  846 non-null    int64  
 1   circularity                  841 non-null    float64
 2   distance_circularity         842 non-null    float64
 3   radius_ratio                 840 non-null    float64
 4   pr.axis_aspect_ratio         844 non-null    float64
 5   max.length_aspect_ratio      846 non-null    int64  
 6   scatter_ratio                845 non-null    float64
 7   elongatedness                845 non-null    float64
 8   pr.axis_rectangularity       843 non-null    float64
 9   max.length_rectangularity    846 non-null    int64  
 10  scaled_variance              843 non-null    float64
 11  scaled_variance.1            844 non-null    float64
 12  scaled_radius_of_gyration    844 non-null    float64
 13  scaled_radius_of_gyration.1  842 non-null    float64
 14  skewness_about               840 non-null    float64
 15  skewness_about.1             845 non-null    float64
 16  skewness_about.2             845 non-null    float64
 17  hollows_ratio                846 non-null    int64  
 18  class                        846 non-null    object 
dtypes: float64(14), int64(4), object(1)
memory usage: 125.7+ KB
In [6]:
df.isnull().sum()
Out[6]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [7]:
df.describe().T
Out[7]:
count mean std min 25% 50% 75% max
compactness 846.0 93.678487 8.234474 73.0 87.00 93.0 100.0 119.0
circularity 841.0 44.828775 6.152172 33.0 40.00 44.0 49.0 59.0
distance_circularity 842.0 82.110451 15.778292 40.0 70.00 80.0 98.0 112.0
radius_ratio 840.0 168.888095 33.520198 104.0 141.00 167.0 195.0 333.0
pr.axis_aspect_ratio 844.0 61.678910 7.891463 47.0 57.00 61.0 65.0 138.0
max.length_aspect_ratio 846.0 8.567376 4.601217 2.0 7.00 8.0 10.0 55.0
scatter_ratio 845.0 168.901775 33.214848 112.0 147.00 157.0 198.0 265.0
elongatedness 845.0 40.933728 7.816186 26.0 33.00 43.0 46.0 61.0
pr.axis_rectangularity 843.0 20.582444 2.592933 17.0 19.00 20.0 23.0 29.0
max.length_rectangularity 846.0 147.998818 14.515652 118.0 137.00 146.0 159.0 188.0
scaled_variance 843.0 188.631079 31.411004 130.0 167.00 179.0 217.0 320.0
scaled_variance.1 844.0 439.494076 176.666903 184.0 318.00 363.5 587.0 1018.0
scaled_radius_of_gyration 844.0 174.709716 32.584808 109.0 149.00 173.5 198.0 268.0
scaled_radius_of_gyration.1 842.0 72.447743 7.486190 59.0 67.00 71.5 75.0 135.0
skewness_about 840.0 6.364286 4.920649 0.0 2.00 6.0 9.0 22.0
skewness_about.1 845.0 12.602367 8.936081 0.0 5.00 11.0 19.0 41.0
skewness_about.2 845.0 188.919527 6.155809 176.0 184.00 188.0 193.0 206.0
hollows_ratio 846.0 195.632388 7.438797 181.0 190.25 197.0 201.0 211.0
In [8]:
df_nonnumeric= df.iloc[:,-1]
df_numeric= df.iloc[:,:-1]
In [9]:
df_numeric.head()
Out[9]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183
In [10]:
df_numeric.median()
Out[10]:
compactness                     93.0
circularity                     44.0
distance_circularity            80.0
radius_ratio                   167.0
pr.axis_aspect_ratio            61.0
max.length_aspect_ratio          8.0
scatter_ratio                  157.0
elongatedness                   43.0
pr.axis_rectangularity          20.0
max.length_rectangularity      146.0
scaled_variance                179.0
scaled_variance.1              363.5
scaled_radius_of_gyration      173.5
scaled_radius_of_gyration.1     71.5
skewness_about                   6.0
skewness_about.1                11.0
skewness_about.2               188.0
hollows_ratio                  197.0
dtype: float64
In [11]:
column = df_numeric.columns
In [12]:
length = len(column)
In [13]:
for i in range(0,length):
    df_numeric[column[i]].fillna(df_numeric[column[i]].median(), inplace = True)
In [14]:
df.skew()
Out[14]:
compactness                    0.381271
circularity                    0.261809
distance_circularity           0.106585
radius_ratio                   0.394978
pr.axis_aspect_ratio           3.830362
max.length_aspect_ratio        6.778394
scatter_ratio                  0.607271
elongatedness                  0.047847
pr.axis_rectangularity         0.770889
max.length_rectangularity      0.256359
scaled_variance                0.651598
scaled_variance.1              0.842034
scaled_radius_of_gyration      0.279317
scaled_radius_of_gyration.1    2.083496
skewness_about                 0.776519
skewness_about.1               0.688017
skewness_about.2               0.249321
hollows_ratio                 -0.226341
dtype: float64
In [15]:
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
In [16]:
pt.fit(df_numeric)
orig_df = pd.DataFrame(pt.fit_transform(df_numeric))
orig_df.columns = df_numeric.columns
orig_df.head()
Out[16]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 0.250889 0.580135 0.097253 0.349942 1.460002 0.747156 -0.041313 0.163051 -0.019052 0.792459 -0.293757 -0.118211 0.349953 -0.209612 0.148898 0.535243 -0.261662 0.133806
1 -0.247957 -0.576832 0.160094 -0.816888 -0.648789 0.435588 -0.511247 0.539220 -0.525853 -0.284310 -0.522119 -0.504364 -0.461911 0.117730 0.681443 0.342020 0.071166 0.414230
2 1.227603 0.873652 1.471555 1.180118 0.793256 0.747156 1.155070 -1.152131 1.045385 0.729773 1.114092 1.149019 1.341615 0.268943 1.379928 -0.215881 -0.093673 -0.003447
3 0.007051 -0.576832 0.034093 -0.220848 0.383072 0.435588 -0.712978 0.662983 -0.525853 -0.284310 -0.935944 -0.695261 -1.574168 -1.686670 0.148898 -0.093508 1.561779 1.617513
4 -1.088077 -0.053863 -0.750785 1.079043 3.209391 3.973288 -0.511247 0.539220 -0.525853 -0.212040 1.523408 -0.548251 0.467024 3.312080 0.681443 0.022763 -1.532626 -1.616862
In [17]:
orig_df.skew()
Out[17]:
compactness                    0.020835
circularity                    0.009902
distance_circularity          -0.029340
radius_ratio                  -0.006298
pr.axis_aspect_ratio          -0.099316
max.length_aspect_ratio       -0.183269
scatter_ratio                  0.049224
elongatedness                 -0.043290
pr.axis_rectangularity         0.124225
max.length_rectangularity      0.011789
scaled_variance                0.024486
scaled_variance.1              0.046460
scaled_radius_of_gyration     -0.009842
scaled_radius_of_gyration.1    0.001559
skewness_about                -0.086317
skewness_about.1              -0.096120
skewness_about.2               0.019008
hollows_ratio                 -0.040011
dtype: float64
In [18]:
df= pd.concat([orig_df,df_nonnumeric], axis=1)
In [19]:
df.head()
Out[19]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 0.250889 0.580135 0.097253 0.349942 1.460002 0.747156 -0.041313 0.163051 -0.019052 0.792459 -0.293757 -0.118211 0.349953 -0.209612 0.148898 0.535243 -0.261662 0.133806 van
1 -0.247957 -0.576832 0.160094 -0.816888 -0.648789 0.435588 -0.511247 0.539220 -0.525853 -0.284310 -0.522119 -0.504364 -0.461911 0.117730 0.681443 0.342020 0.071166 0.414230 van
2 1.227603 0.873652 1.471555 1.180118 0.793256 0.747156 1.155070 -1.152131 1.045385 0.729773 1.114092 1.149019 1.341615 0.268943 1.379928 -0.215881 -0.093673 -0.003447 car
3 0.007051 -0.576832 0.034093 -0.220848 0.383072 0.435588 -0.712978 0.662983 -0.525853 -0.284310 -0.935944 -0.695261 -1.574168 -1.686670 0.148898 -0.093508 1.561779 1.617513 van
4 -1.088077 -0.053863 -0.750785 1.079043 3.209391 3.973288 -0.511247 0.539220 -0.525853 -0.212040 1.523408 -0.548251 0.467024 3.312080 0.681443 0.022763 -1.532626 -1.616862 bus
In [20]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   compactness                  846 non-null    float64
 1   circularity                  846 non-null    float64
 2   distance_circularity         846 non-null    float64
 3   radius_ratio                 846 non-null    float64
 4   pr.axis_aspect_ratio         846 non-null    float64
 5   max.length_aspect_ratio      846 non-null    float64
 6   scatter_ratio                846 non-null    float64
 7   elongatedness                846 non-null    float64
 8   pr.axis_rectangularity       846 non-null    float64
 9   max.length_rectangularity    846 non-null    float64
 10  scaled_variance              846 non-null    float64
 11  scaled_variance.1            846 non-null    float64
 12  scaled_radius_of_gyration    846 non-null    float64
 13  scaled_radius_of_gyration.1  846 non-null    float64
 14  skewness_about               846 non-null    float64
 15  skewness_about.1             846 non-null    float64
 16  skewness_about.2             846 non-null    float64
 17  hollows_ratio                846 non-null    float64
 18  class                        846 non-null    object 
dtypes: float64(18), object(1)
memory usage: 125.7+ KB
In [21]:
sns.pairplot(df, hue = 'class')
Out[21]:
<seaborn.axisgrid.PairGrid at 0x1619037d6a0>
In [22]:
df.corr()
Out[22]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
compactness 1.000000 0.648513 0.777662 0.707969 0.161585 0.359017 0.771602 -0.784543 0.776425 0.654525 0.741832 0.775228 0.543655 -0.271115 0.197084 0.143729 0.364105 0.390185
circularity 0.648513 1.000000 0.780452 0.614104 0.189088 0.458461 0.827850 -0.816120 0.828516 0.962441 0.792711 0.821620 0.932274 0.105080 0.138196 -0.030641 -0.078661 0.029405
distance_circularity 0.777662 0.780452 1.000000 0.786261 0.233360 0.554384 0.914529 -0.911667 0.911610 0.766775 0.884154 0.913681 0.690531 -0.205292 0.084505 0.241224 0.192830 0.324180
radius_ratio 0.707969 0.614104 0.786261 1.000000 0.685795 0.485509 0.777654 -0.804529 0.761803 0.567679 0.820374 0.790084 0.525666 -0.293409 0.031756 0.157922 0.449663 0.492371
pr.axis_aspect_ratio 0.161585 0.189088 0.233360 0.685795 1.000000 0.308149 0.207715 -0.241712 0.189232 0.144730 0.275515 0.225100 0.133556 -0.196320 -0.051683 -0.026461 0.392798 0.403622
max.length_aspect_ratio 0.359017 0.458461 0.554384 0.485509 0.308149 1.000000 0.413294 -0.390265 0.416005 0.538460 0.430869 0.401119 0.326457 -0.197144 0.026354 0.112037 0.112418 0.357824
scatter_ratio 0.771602 0.827850 0.914529 0.777654 0.207715 0.413294 1.000000 -0.993486 0.986466 0.791645 0.959768 0.994745 0.773461 -0.017400 0.047472 0.189254 0.071914 0.151573
elongatedness -0.784543 -0.816120 -0.911667 -0.804529 -0.241712 -0.390265 -0.993486 1.000000 -0.981512 -0.773461 -0.963479 -0.996271 -0.760251 0.055649 -0.046397 -0.172487 -0.134561 -0.195126
pr.axis_rectangularity 0.776425 0.828516 0.911610 0.761803 0.189232 0.416005 0.986466 -0.981512 1.000000 0.798061 0.944046 0.984235 0.772019 -0.013099 0.054186 0.192377 0.055813 0.142379
max.length_rectangularity 0.654525 0.962441 0.766775 0.567679 0.144730 0.538460 0.791645 -0.773461 0.798061 1.000000 0.748092 0.782676 0.871857 0.083790 0.127713 -0.010877 -0.070827 0.064959
scaled_variance 0.741832 0.792711 0.884154 0.820374 0.275515 0.430869 0.959768 -0.963479 0.944046 0.748092 1.000000 0.962269 0.756536 0.045770 0.016167 0.176640 0.086126 0.126152
scaled_variance.1 0.775228 0.821620 0.913681 0.790084 0.225100 0.401119 0.994745 -0.996271 0.984235 0.782676 0.962269 1.000000 0.766812 -0.034191 0.045544 0.178169 0.102986 0.172951
scaled_radius_of_gyration 0.543655 0.932274 0.690531 0.525666 0.133556 0.326457 0.773461 -0.760251 0.772019 0.871857 0.756536 0.766812 1.000000 0.254500 0.171176 -0.069807 -0.210354 -0.135990
scaled_radius_of_gyration.1 -0.271115 0.105080 -0.205292 -0.293409 -0.196320 -0.197144 -0.017400 0.055649 -0.013099 0.083790 0.045770 -0.034191 0.254500 1.000000 -0.078408 -0.101010 -0.846691 -0.889320
skewness_about 0.197084 0.138196 0.084505 0.031756 -0.051683 0.026354 0.047472 -0.046397 0.054186 0.127713 0.016167 0.045544 0.171176 -0.078408 1.000000 -0.032580 0.097731 0.070006
skewness_about.1 0.143729 -0.030641 0.241224 0.157922 -0.026461 0.112037 0.189254 -0.172487 0.192377 -0.010877 0.176640 0.178169 -0.069807 -0.101010 -0.032580 1.000000 0.069218 0.175997
skewness_about.2 0.364105 -0.078661 0.192830 0.449663 0.392798 0.112418 0.071914 -0.134561 0.055813 -0.070827 0.086126 0.102986 -0.210354 -0.846691 0.097731 0.069218 1.000000 0.901292
hollows_ratio 0.390185 0.029405 0.324180 0.492371 0.403622 0.357824 0.151573 -0.195126 0.142379 0.064959 0.126152 0.172951 -0.135990 -0.889320 0.070006 0.175997 0.901292 1.000000
In [23]:
clf=svm.SVC()
In [24]:
X=df.iloc[:,:-1]
y= df.iloc[:,-1]
In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)
In [26]:
clf.fit(X_train, y_train)
Out[26]:
SVC()
In [27]:
clf.score(X_test, y_test)
Out[27]:
0.9566929133858267
In [28]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
Out[28]:
0.9566929133858267
In [29]:
pd.crosstab(y_test, y_pred)
Out[29]:
col_0 bus car van
class
bus 59 0 0
car 1 128 4
van 4 2 56
In [30]:
print(metrics.classification_report(y_test, y_pred))
              precision    recall  f1-score   support

         bus       0.92      1.00      0.96        59
         car       0.98      0.96      0.97       133
         van       0.93      0.90      0.92        62

    accuracy                           0.96       254
   macro avg       0.95      0.96      0.95       254
weighted avg       0.96      0.96      0.96       254

In [31]:
X1=df.iloc[:,:-1]
y1= df.iloc[:,-1]
In [32]:
sc = StandardScaler()
X_std =  sc.fit_transform(X1)          
cov_matrix = np.cov(X_std.T)
print('Covariance Matrix \n%s', cov_matrix)
Covariance Matrix 
%s [[ 1.00118343  0.64928071  0.77858213  0.70880729  0.16177611  0.35944219
   0.77251518 -0.78547172  0.77734376  0.65530004  0.7427099   0.77614515
   0.54429887 -0.27143577  0.19731709  0.14389942  0.36453607  0.39064704]
 [ 0.64928071  1.00118343  0.78137544  0.61483087  0.1893121   0.45900331
   0.82882941 -0.81708621  0.82949691  0.96358033  0.79364869  0.82259206
   0.93337754  0.1052041   0.13835956 -0.0306774  -0.0787538   0.02943942]
 [ 0.77858213  0.78137544  1.00118343  0.78719134  0.23363637  0.55504022
   0.91561086 -0.91274554  0.91268863  0.76768225  0.88520049  0.91476224
   0.69134771 -0.20553513  0.08460481  0.24150986  0.19305789  0.32456351]
 [ 0.70880729  0.61483087  0.78719134  1.00118343  0.68660665  0.48608324
   0.77857472 -0.80548087  0.76270496  0.56835048  0.8213446   0.79101898
   0.52628834 -0.29375624  0.03179368  0.1581088   0.45019476  0.49295344]
 [ 0.16177611  0.1893121   0.23363637  0.68660665  1.00118343  0.30851409
   0.20796094 -0.24199761  0.18945636  0.14490169  0.27584132  0.22536673
   0.1337144  -0.19655254 -0.0517439  -0.02649184  0.39326274  0.40409996]
 [ 0.35944219  0.45900331  0.55504022  0.48608324  0.30851409  1.00118343
   0.41378358 -0.39072676  0.41649714  0.53909765  0.43137878  0.40159375
   0.32684376 -0.1973776   0.02638484  0.11216958  0.11255057  0.35824721]
 [ 0.77251518  0.82882941  0.91561086  0.77857472  0.20796094  0.41378358
   1.00118343 -0.99466158  0.98763363  0.79258191  0.9609034   0.99592191
   0.77437624 -0.01742101  0.04752839  0.1894784   0.07199898  0.15175228]
 [-0.78547172 -0.81708621 -0.91274554 -0.80548087 -0.24199761 -0.39072676
  -0.99466158  1.00118343 -0.98267372 -0.77437648 -0.96461898 -0.99745031
  -0.76115096  0.05571512 -0.04645186 -0.17269111 -0.13471998 -0.19535697]
 [ 0.77734376  0.82949691  0.91268863  0.76270496  0.18945636  0.41649714
   0.98763363 -0.98267372  1.00118343  0.7990059   0.94516304  0.98539989
   0.77293273 -0.01311419  0.05424988  0.19260506  0.05587928  0.1425479 ]
 [ 0.65530004  0.96358033  0.76768225  0.56835048  0.14490169  0.53909765
   0.79258191 -0.77437648  0.7990059   1.00118343  0.74897684  0.78360226
   0.87288868  0.08388954  0.12786409 -0.01088969 -0.07091129  0.06503559]
 [ 0.7427099   0.79364869  0.88520049  0.8213446   0.27584132  0.43137878
   0.9609034  -0.96461898  0.94516304  0.74897684  1.00118343  0.96340814
   0.75743093  0.04582379  0.01618583  0.176849    0.0862275   0.12630146]
 [ 0.77614515  0.82259206  0.91476224  0.79101898  0.22536673  0.40159375
   0.99592191 -0.99745031  0.98539989  0.78360226  0.96340814  1.00118343
   0.76771974 -0.03423153  0.04559762  0.17837992  0.10310782  0.17315527]
 [ 0.54429887  0.93337754  0.69134771  0.52628834  0.1337144   0.32684376
   0.77437624 -0.76115096  0.77293273  0.87288868  0.75743093  0.76771974
   1.00118343  0.25480132  0.17137877 -0.06989    -0.21060282 -0.13615104]
 [-0.27143577  0.1052041  -0.20553513 -0.29375624 -0.19655254 -0.1973776
  -0.01742101  0.05571512 -0.01311419  0.08388954  0.04582379 -0.03423153
   0.25480132  1.00118343 -0.07850087 -0.10112969 -0.84769285 -0.89037257]
 [ 0.19731709  0.13835956  0.08460481  0.03179368 -0.0517439   0.02638484
   0.04752839 -0.04645186  0.05424988  0.12786409  0.01618583  0.04559762
   0.17137877 -0.07850087  1.00118343 -0.03261896  0.09784631  0.07008885]
 [ 0.14389942 -0.0306774   0.24150986  0.1581088  -0.02649184  0.11216958
   0.1894784  -0.17269111  0.19260506 -0.01088969  0.176849    0.17837992
  -0.06989    -0.10112969 -0.03261896  1.00118343  0.06929987  0.17620544]
 [ 0.36453607 -0.0787538   0.19305789  0.45019476  0.39326274  0.11255057
   0.07199898 -0.13471998  0.05587928 -0.07091129  0.0862275   0.10310782
  -0.21060282 -0.84769285  0.09784631  0.06929987  1.00118343  0.90235828]
 [ 0.39064704  0.02943942  0.32456351  0.49295344  0.40409996  0.35824721
   0.15175228 -0.19535697  0.1425479   0.06503559  0.12630146  0.17315527
  -0.13615104 -0.89037257  0.07008885  0.17620544  0.90235828  1.00118343]]
In [33]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print('Eigen Vectors \n%s', eigenvectors)
print('\n Eigen Values \n%s', eigenvalues)
Eigen Vectors 
%s [[-2.65249806e-01 -1.03022012e-01  2.41878917e-01 -1.68212796e-02
  -1.48092118e-01 -5.94202136e-02  1.61229043e-01 -8.27249133e-01
  -2.58635515e-01 -1.11120054e-01 -1.43186585e-01 -8.28627981e-02
  -7.26179018e-02 -9.62242849e-02 -4.81204894e-02  3.01778484e-02
  -4.75830215e-03  7.25499175e-03]
 [-2.83739997e-01  1.55767519e-01 -5.11666331e-03 -2.10371203e-01
   8.27583413e-02 -1.03537138e-01 -4.09070451e-01 -2.66692189e-02
  -8.33878765e-03  7.15939886e-02  4.64365455e-02  2.00194517e-01
   1.07243361e-01 -5.16262686e-01  5.72840539e-01  1.03747196e-01
  -2.91190097e-02 -1.05685825e-02]
 [-3.04039660e-01 -3.62756689e-02  7.30756307e-02  9.11467591e-02
   8.15573542e-02 -7.43393903e-02  1.15297741e-01  1.83306358e-01
  -3.74158779e-01 -2.76833875e-01  7.50965614e-01 -1.67097117e-01
  -1.49596638e-01  1.89889841e-02  3.61053590e-02 -2.78687230e-02
   8.49468526e-03 -1.13446286e-02]
 [-2.74143941e-01 -1.88579602e-01 -2.50161489e-01  2.92723167e-03
  -8.66319967e-02  2.49671105e-01  9.85828727e-02 -4.62828791e-02
  -2.13286630e-02 -2.35389193e-01 -1.87181840e-02  2.81435979e-01
   5.71069770e-01  3.33222855e-01  2.28797987e-01 -3.45706449e-01
  -2.56958203e-03  2.33720809e-02]
 [-1.04195716e-01 -2.35493808e-01 -6.21822552e-01 -1.71347341e-01
   6.14117485e-03  5.09172018e-01 -1.28469587e-01 -5.57417052e-02
  -2.56502633e-01  1.61656982e-01 -1.44759084e-02 -7.94355783e-02
  -2.78218773e-01 -1.29023261e-01 -1.47161022e-01  1.56419453e-01
  -3.57653719e-03 -1.10569840e-02]
 [-1.71617997e-01 -1.00205131e-01 -1.97812492e-01 -1.07865539e-01
   7.77470493e-01 -2.22541002e-01  3.95770363e-01 -3.60789181e-02
   1.56742463e-01 -7.58563548e-02 -1.79499328e-01  3.45267306e-02
  -1.59925512e-01 -1.16669863e-02  7.31628466e-02 -5.11752360e-02
   1.35208987e-02  1.48123343e-02]
 [-3.11894600e-01  6.25928350e-02  3.77814890e-02  1.17105313e-01
  -1.11904043e-01 -2.12654014e-03  1.17360308e-01  1.58479902e-01
  -5.98079998e-04  1.99553125e-01 -1.53314918e-01 -5.47931719e-02
  -7.96429394e-02 -1.14694428e-01 -7.82110802e-02 -2.90108604e-01
  -2.68058316e-01 -7.64384611e-01]
 [ 3.12501845e-01 -3.49218209e-02 -2.80012452e-02 -1.04918257e-01
   1.64255501e-01 -8.70690555e-03 -1.05623879e-01 -1.53526772e-01
  -6.71882382e-02 -1.65717947e-01  1.24704245e-01  5.60312141e-02
   1.07867560e-01  1.61356773e-01  4.40789132e-02  2.48698472e-01
  -8.09360912e-01 -1.42656483e-01]
 [-3.10107352e-01  6.86945400e-02  5.36072098e-02  1.15259740e-01
  -9.51299321e-02 -1.33933958e-02  1.12706693e-01  1.23631472e-01
  -8.73177256e-02  2.57149600e-01 -1.81787124e-01 -9.05204800e-02
  -1.09428357e-01  5.27499115e-01  3.78346077e-01  5.45288179e-01
   1.24450218e-03  5.66774283e-03]
 [-2.75756169e-01  1.41079995e-01  1.77903514e-02 -2.06055603e-01
   1.98091375e-01 -1.89854559e-01 -3.68938048e-01 -1.75738029e-01
   4.82272218e-02  4.30702886e-01  2.41037351e-01  2.27439863e-01
   9.98227668e-02  3.36621138e-01 -4.38728528e-01 -6.55216577e-02
   3.12646668e-02  1.74664602e-02]
 [-3.05662231e-01  6.13597246e-02 -5.41714358e-02  1.19085663e-01
  -1.11925478e-01  7.19007891e-02  2.06212530e-01  5.83492139e-02
   3.73128658e-01 -2.30475022e-01  8.53938270e-02  2.29644351e-01
   2.22819069e-01 -2.75225859e-01 -3.88118080e-01  5.40606731e-01
  -2.58420584e-02 -2.71164327e-02]
 [-3.12154429e-01  4.94496819e-02  3.08405978e-02  1.10519172e-01
  -1.39948022e-01  2.23898254e-03  1.13704640e-01  1.64553402e-01
   4.51769257e-02  1.93787554e-01 -1.15036192e-01 -7.79965221e-02
  -1.09003427e-01 -1.41164556e-01 -7.79128557e-02 -2.67335789e-01
  -5.18284330e-01  6.24878921e-01]
 [-2.58464283e-01  2.35717337e-01  6.86194888e-05 -2.21617995e-01
   2.31352459e-03  5.93323082e-03 -4.13924835e-01  1.09529528e-01
   9.13331285e-02 -6.04624532e-01 -3.06478343e-01 -2.93737636e-01
  -1.71419057e-01  2.00037262e-01 -1.37304989e-01 -5.23628433e-02
  -5.07417276e-03 -2.22617488e-03]
 [ 3.59699918e-02  4.95044359e-01 -1.89384724e-01  4.70017592e-02
   1.55667117e-02  2.76022427e-01  1.26219421e-01 -3.21426244e-01
   4.57390825e-01  1.03285531e-01  3.03376996e-01 -3.93194435e-01
   2.01133538e-02  5.84681415e-02  1.95309208e-01 -1.04337837e-01
  -5.10454494e-03 -2.21290869e-02]
 [-3.27733460e-02 -2.16280242e-02  5.81746175e-01 -4.93016639e-01
   1.69689325e-01  5.76471747e-01  1.71064648e-01  1.36688228e-01
   1.22038034e-02  7.47046586e-02  1.37513605e-02  1.87030018e-02
   3.29445546e-02 -1.29467737e-02 -1.46432022e-02  1.21956086e-02
   3.46209019e-03  2.54272115e-03]
 [-5.16845687e-02 -8.90837542e-02  2.35988151e-01  7.04782145e-01
   3.86938950e-01  3.74619186e-01 -3.59021783e-01 -8.17792526e-02
   2.85061791e-02 -2.44150397e-02 -5.30695583e-02  6.65788615e-02
  -4.79393685e-02 -9.89681605e-03 -1.84352801e-03 -1.03510969e-02
   6.63531149e-03  1.14113489e-02]
 [-5.80127645e-02 -5.08765049e-01  7.44552697e-02 -7.99226140e-02
  -2.19311200e-01 -2.89307619e-02 -1.09060928e-01 -8.17955791e-02
   5.50010905e-01 -5.82258827e-02  2.01869673e-01  1.99287806e-01
  -4.62331800e-01  1.33204122e-01  1.73797165e-01 -8.29881440e-02
  -3.95143520e-02 -4.27211035e-02]
 [-8.94857243e-02 -5.08821208e-01  5.42024985e-02 -4.23570035e-02
   7.61489140e-02 -1.33819526e-01 -1.54907594e-01  4.56562502e-02
   1.59606312e-01  1.52320089e-01  1.26861687e-02 -6.54339122e-01
   4.28220630e-01 -9.02972146e-02 -3.25694781e-02  9.38552664e-02
   1.96927259e-04 -1.34815346e-02]]

 Eigen Values 
%s [9.64104985e+00 3.29869627e+00 1.17183871e+00 1.19739923e+00
 8.59916998e-01 7.84115472e-01 3.72629538e-01 2.56895019e-01
 1.26797293e-01 9.18091536e-02 6.84368623e-02 5.46198698e-02
 3.70337677e-02 2.07847621e-02 1.88231087e-02 1.32661767e-02
 2.77910098e-03 4.41059005e-03]
In [34]:
# Make a set of (eigenvalue, eigenvector) pairs
eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]


eig_pairs.sort()

eig_pairs.reverse()
print(eig_pairs)


eigvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]


print('Eigenvalues in descending order: \n%s' %eigvalues_sorted)
[(9.64104985312631, array([-0.26524981, -0.28374   , -0.30403966, -0.27414394, -0.10419572,
       -0.171618  , -0.3118946 ,  0.31250184, -0.31010735, -0.27575617,
       -0.30566223, -0.31215443, -0.25846428,  0.03596999, -0.03277335,
       -0.05168457, -0.05801276, -0.08948572])), (3.298696269122248, array([-0.10302201,  0.15576752, -0.03627567, -0.1885796 , -0.23549381,
       -0.10020513,  0.06259283, -0.03492182,  0.06869454,  0.14107999,
        0.06135972,  0.04944968,  0.23571734,  0.49504436, -0.02162802,
       -0.08908375, -0.50876505, -0.50882121])), (1.1973992330834702, array([-0.01682128, -0.2103712 ,  0.09114676,  0.00292723, -0.17134734,
       -0.10786554,  0.11710531, -0.10491826,  0.11525974, -0.2060556 ,
        0.11908566,  0.11051917, -0.221618  ,  0.04700176, -0.49301664,
        0.70478214, -0.07992261, -0.042357  ])), (1.1718387078485022, array([ 2.41878917e-01, -5.11666331e-03,  7.30756307e-02, -2.50161489e-01,
       -6.21822552e-01, -1.97812492e-01,  3.77814890e-02, -2.80012452e-02,
        5.36072098e-02,  1.77903514e-02, -5.41714358e-02,  3.08405978e-02,
        6.86194888e-05, -1.89384724e-01,  5.81746175e-01,  2.35988151e-01,
        7.44552697e-02,  5.42024985e-02])), (0.8599169977079716, array([-0.14809212,  0.08275834,  0.08155735, -0.086632  ,  0.00614117,
        0.77747049, -0.11190404,  0.1642555 , -0.09512993,  0.19809138,
       -0.11192548, -0.13994802,  0.00231352,  0.01556671,  0.16968933,
        0.38693895, -0.2193112 ,  0.07614891])), (0.7841154723760503, array([-0.05942021, -0.10353714, -0.07433939,  0.24967111,  0.50917202,
       -0.222541  , -0.00212654, -0.00870691, -0.0133934 , -0.18985456,
        0.07190079,  0.00223898,  0.00593323,  0.27602243,  0.57647175,
        0.37461919, -0.02893076, -0.13381953])), (0.3726295376446502, array([ 0.16122904, -0.40907045,  0.11529774,  0.09858287, -0.12846959,
        0.39577036,  0.11736031, -0.10562388,  0.11270669, -0.36893805,
        0.20621253,  0.11370464, -0.41392483,  0.12621942,  0.17106465,
       -0.35902178, -0.10906093, -0.15490759])), (0.2568950194548444, array([-0.82724913, -0.02666922,  0.18330636, -0.04628288, -0.05574171,
       -0.03607892,  0.1584799 , -0.15352677,  0.12363147, -0.17573803,
        0.05834921,  0.1645534 ,  0.10952953, -0.32142624,  0.13668823,
       -0.08177925, -0.08179558,  0.04565625])), (0.1267972928557123, array([-0.25863551, -0.00833879, -0.37415878, -0.02132866, -0.25650263,
        0.15674246, -0.00059808, -0.06718824, -0.08731773,  0.04822722,
        0.37312866,  0.04517693,  0.09133313,  0.45739083,  0.0122038 ,
        0.02850618,  0.55001091,  0.15960631])), (0.09180915364321345, array([-0.11112005,  0.07159399, -0.27683387, -0.23538919,  0.16165698,
       -0.07585635,  0.19955312, -0.16571795,  0.2571496 ,  0.43070289,
       -0.23047502,  0.19378755, -0.60462453,  0.10328553,  0.07470466,
       -0.02441504, -0.05822588,  0.15232009])), (0.06843686227401986, array([-0.14318658,  0.04643655,  0.75096561, -0.01871818, -0.01447591,
       -0.17949933, -0.15331492,  0.12470425, -0.18178712,  0.24103735,
        0.08539383, -0.11503619, -0.30647834,  0.303377  ,  0.01375136,
       -0.05306956,  0.20186967,  0.01268617])), (0.05461986976927837, array([-0.0828628 ,  0.20019452, -0.16709712,  0.28143598, -0.07943558,
        0.03452673, -0.05479317,  0.05603121, -0.09052048,  0.22743986,
        0.22964435, -0.07799652, -0.29373764, -0.39319444,  0.018703  ,
        0.06657886,  0.19928781, -0.65433912])), (0.037033767660817486, array([-0.0726179 ,  0.10724336, -0.14959664,  0.57106977, -0.27821877,
       -0.15992551, -0.07964294,  0.10786756, -0.10942836,  0.09982277,
        0.22281907, -0.10900343, -0.17141906,  0.02011335,  0.03294455,
       -0.04793937, -0.4623318 ,  0.42822063])), (0.0207847620703778, array([-0.09622428, -0.51626269,  0.01898898,  0.33322286, -0.12902326,
       -0.01166699, -0.11469443,  0.16135677,  0.52749911,  0.33662114,
       -0.27522586, -0.14116456,  0.20003726,  0.05846814, -0.01294677,
       -0.00989682,  0.13320412, -0.09029721])), (0.01882310874197125, array([-0.04812049,  0.57284054,  0.03610536,  0.22879799, -0.14716102,
        0.07316285, -0.07821108,  0.04407891,  0.37834608, -0.43872853,
       -0.38811808, -0.07791286, -0.13730499,  0.19530921, -0.0146432 ,
       -0.00184353,  0.17379716, -0.03256948])), (0.013266176739548647, array([ 0.03017785,  0.1037472 , -0.02786872, -0.34570645,  0.15641945,
       -0.05117524, -0.2901086 ,  0.24869847,  0.54528818, -0.06552166,
        0.54060673, -0.26733579, -0.05236284, -0.10433784,  0.01219561,
       -0.0103511 , -0.08298814,  0.09385527])), (0.004410590045403887, array([ 0.00725499, -0.01056858, -0.01134463,  0.02337208, -0.01105698,
        0.01481233, -0.76438461, -0.14265648,  0.00566774,  0.01746646,
       -0.02711643,  0.62487892, -0.00222617, -0.02212909,  0.00254272,
        0.01141135, -0.0427211 , -0.01348153])), (0.0027791009835487334, array([-4.75830215e-03, -2.91190097e-02,  8.49468526e-03, -2.56958203e-03,
       -3.57653719e-03,  1.35208987e-02, -2.68058316e-01, -8.09360912e-01,
        1.24450218e-03,  3.12646668e-02, -2.58420584e-02, -5.18284330e-01,
       -5.07417276e-03, -5.10454494e-03,  3.46209019e-03,  6.63531149e-03,
       -3.95143520e-02,  1.96927259e-04]))]
Eigenvalues in descending order: 
[9.64104985312631, 3.298696269122248, 1.1973992330834702, 1.1718387078485022, 0.8599169977079716, 0.7841154723760503, 0.3726295376446502, 0.2568950194548444, 0.1267972928557123, 0.09180915364321345, 0.06843686227401986, 0.05461986976927837, 0.037033767660817486, 0.0207847620703778, 0.01882310874197125, 0.013266176739548647, 0.004410590045403887, 0.0027791009835487334]
In [35]:
tot = sum(eigenvalues)
var_explained = [(i / tot) for i in sorted(eigenvalues, reverse=True)] 
cum_var_exp = np.cumsum(var_explained) 
In [36]:
plt.bar(range(1,19), var_explained, alpha=0.5, align='center', label='individual explained variance')
plt.step(range(1,19),cum_var_exp, where= 'mid', label='cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc = 'best')
plt.show()
In [37]:
pca = PCA(n_components=9) # reducing the dimension to 9 from 19
X_pca = pca.fit_transform(X_std)
In [38]:
from sklearn import model_selection

X_train, X_test, y_train, y_test = model_selection.train_test_split(X_pca, y1, test_size=0.30, random_state=1)
In [39]:
clf.fit(X_train, y_train)
Out[39]:
SVC()
In [40]:
clf.score(X_test, y_test)
Out[40]:
0.9330708661417323
In [41]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
Out[41]:
0.9330708661417323
In [42]:
pd.crosstab(y_test, y_pred)
Out[42]:
col_0 bus car van
class
bus 58 1 0
car 1 126 6
van 5 4 53
In [43]:
print(metrics.classification_report(y_test, y_pred))
              precision    recall  f1-score   support

         bus       0.91      0.98      0.94        59
         car       0.96      0.95      0.95       133
         van       0.90      0.85      0.88        62

    accuracy                           0.93       254
   macro avg       0.92      0.93      0.92       254
weighted avg       0.93      0.93      0.93       254

--- End----